/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.tests;

import static org.junit.Assert.assertArrayEquals;

import java.util.ArrayList;
import java.util.List;

import junit.framework.TestCase;
import mosdi.util.Alphabet;
import mosdi.util.NamedSequence;
import mosdi.util.ProductEncoder;
import mosdi.util.SequenceUtils;

public class SequenceUtilsTest extends TestCase {

	public void testConcatSequences() {
		List<NamedSequence> l = new ArrayList<NamedSequence>();
		Alphabet alphabet = Alphabet.getDnaAlphabet();
		l.add(new NamedSequence("seq1", alphabet.buildIndexArray("AATACCGGAA")));
		l.add(new NamedSequence("seq2", alphabet.buildIndexArray("CCCGATTTTT")));
		int[] result1 = SequenceUtils.concatSequences(l, false);
		int[] expected1 = {0,0,3,0,1,1,2,2,0,0,-1,1,1,1,2,0,3,3,3,3,3,-1};
		assertArrayEquals(expected1,result1);
		int[] result2 = SequenceUtils.concatSequences(l, true);
		int[] expected2 = {0,0,3,0,1,1,2,2,0,0,-1,3,3,1,1,2,2,3,0,3,3,-1,1,1,1,2,0,3,3,3,3,3,-1,0,0,0,0,0,3,1,2,2,2,-1};
		assertArrayEquals(expected2,result2);
	}

	public void testReverseComplement() {
		Alphabet alphabet = Alphabet.getDnaAlphabet();
		alphabet.setSeparator('$');
		String seq = "AATACC$GGAA";
		int[] rc = SequenceUtils.reverseComplementaryCopy(alphabet.buildIndexArray(seq));
		assertEquals("TTCC$GGTATT", alphabet.buildString(rc));
	}

	public void testQGramCount() {
		int[] sequence = {2, 2, 0, 3, 3, 0, 0, 0, 3, -1, 3, 2, 0, 3, 2, 0, 0, 1, 0, 0, 0, -1};
		int q = 2;
		int alphabetSize = 4;
		double[] wordFrequencies = SequenceUtils.countQGrams(q, alphabetSize, sequence);
		ProductEncoder qe = SequenceUtils.qGramEncoder(q, alphabetSize);
		assertEquals(5, wordFrequencies[qe.encode(0,0)], 0.0);
		assertEquals(1, wordFrequencies[qe.encode(0,1)], 0.0);
		assertEquals(0, wordFrequencies[qe.encode(0,2)], 0.0);
		assertEquals(3, wordFrequencies[qe.encode(0,3)], 0.0);
		assertEquals(1, wordFrequencies[qe.encode(1,0)], 0.0);
		assertEquals(0, wordFrequencies[qe.encode(1,1)], 0.0);
		assertEquals(0, wordFrequencies[qe.encode(1,2)], 0.0);
		assertEquals(0, wordFrequencies[qe.encode(1,3)], 0.0);
		assertEquals(3, wordFrequencies[qe.encode(2,0)], 0.0);
		assertEquals(0, wordFrequencies[qe.encode(2,1)], 0.0);
		assertEquals(1, wordFrequencies[qe.encode(2,2)], 0.0);
		assertEquals(0, wordFrequencies[qe.encode(2,3)], 0.0);
		assertEquals(1, wordFrequencies[qe.encode(3,0)], 0.0);
		assertEquals(0, wordFrequencies[qe.encode(3,1)], 0.0);
		assertEquals(2, wordFrequencies[qe.encode(3,2)], 0.0);
		assertEquals(1, wordFrequencies[qe.encode(3,3)], 0.0);
	}

	public void testQGramCount2() {
		int[] sequence = {2, 2, 0, 3, 3, 0, 0, 0, 3, -1, 3, 2, 0, 3, 2, 0, 0, 1, 0, 0, 0, -1};
		int q = 3;
		int alphabetSize = 4;
		double[] wordFrequencies = SequenceUtils.countQGrams(q, alphabetSize, sequence, 0.5);
		double[] expected = {
		//000 001 002 003  010 011 012 013  020 021 022 023  030 031 032 033
		  2.5,1.5,0.5,1.5, 1.5,0.5,0.5,0.5, 0.5,0.5,0.5,0.5, 0.5,0.5,1.5,1.5,
		//100 101 102 103  110 111 112 113  120 121 122 123  130 131 132 133
		  1.5,0.5,0.5,0.5, 0.5,0.5,0.5,0.5, 0.5,0.5,0.5,0.5, 0.5,0.5,0.5,0.5,
		//200 201 202 203  210 211 212 213  220 221 222 223  230 231 232 233
		  1.5,0.5,0.5,2.5, 0.5,0.5,0.5,0.5, 1.5,0.5,0.5,0.5, 0.5,0.5,0.5,0.5,
		//300 301 302 303  310 311 312 313  320 321 322 323  330 331 332 333
		  1.5,0.5,0.5,0.5, 0.5,0.5,0.5,0.5, 2.5,0.5,0.5,0.5, 1.5,0.5,0.5,0.5
		};
		assertArrayEquals(expected, wordFrequencies, 0.0);
	}

	public void testQGramCount3() {
		int[] sequence = {2, 2, 0, 3, 3, 0, 0, 0, 3, -1, 3, 2, 0, 3, 2, 0, 0, 1, 0, 0, 0, -1};
		int q = 2;
		int alphabetSize = 4;
		double[] wordFrequencies = {0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16};
		SequenceUtils.countQGrams(q, alphabetSize, sequence, wordFrequencies);
		double[] expected = {5.01,1.02,0.03,3.04,1.05,0.06,0.07,0.08,3.09,0.10,1.11,0.12,1.13,0.14,2.15,1.16};
		assertArrayEquals(expected, wordFrequencies, 0.0);
	}

	public void testSequenceCountPValue() {
		int[] sequenceLengths = {1000, 950, 1200, 2135, 1236, 8679, 234, 2345, 1200, 2135, 1236, 8679, 234, 2345};
		double[] dist = SequenceUtils.calculateSequenceCountDistribution(sequenceLengths, 0.00001, 10, 1.3, false);
		assertEquals(sequenceLengths.length+1, dist.length);
		double pvalue = SequenceUtils.calculateSequenceCountPValue(10, sequenceLengths, 0.00001, 10, 1.3, false);
		double pvalue2 = 0.0;
		for (int i=dist.length-1; i>=10; --i) pvalue2 += dist[i];
		assertTrue(Math.abs(1.0-pvalue/pvalue2)<1e-10);
		pvalue = SequenceUtils.calculateSequenceCountPValue(10, sequenceLengths, 0.00001, 10, 1.3, true);
		assertTrue(Math.abs(1.0-pvalue/Math.log(pvalue2))<1e-8);
	}
}
